/* ///////////////////////////////////////////////////////////////////////// */
/*  This is part of the source of the OMAP 5912 heterogeneous dual-core      */
/*  MPEG-4 SP video decoder published in ACM Transactions on Embedded        */
/*  Computing Systems, Vol. X, Issue Y.                                      */
/* ------------------------------------------------------------------------- */
/*  The source code is released under GPL license.                           */
/*                                                                           */
/*  Copyright, 2011                                                          */
/*  Multimedia Embedded Systems Labs                                         */
/*  Dept. of Computer Science                                                */
/*  National Chiao Tung University                                           */
/*  Hsinchu, Taiwan.                                                         */
/* ///////////////////////////////////////////////////////////////////////// */

/*
 *
 * This is an mpeg-4 simple profile video decoder based on the
 * xvid 0.9 snapshot on Apr/12/2002. The xvid library was redesigned
 * and rewritten quite a bit to fix conformance issues as well as to simplify
 * the code for easy porting for embedded applications.  Since Xvid is
 * coverred by GPL, the modified source code of the library is
 * still in public domain.  You are free to redistribute and use the
 * source code following GPL guideline.
 *
 */

#define MPU_CNTL_TIMER1 *(uint32*)0xFFFEC500
#define MPU_LOAD_TIMER1 *(uint32*)0xFFFEC504
#define MPU_READ_TIMER1 *(uint32*)0xFFFEC508

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "m4vdec_api.h"
#include "mbx_command.h"
#include <csl_intc.h>
#include <csl_timer.h>
#include <soc.h>
#include "mem_address.h"
#include "dual_core_config.h"

/* some device-dependent prototype functions */
xint dspmmu_setup();
void lcd();
xint arm_announce(uint16 command, uint16 data);
xint listen_dsp();
void enable_peripheral_clk();

#define LCD_OUT
#define READ_BISTREAM_FROM_FLASH

/* Input bitstream file name: this can only be used in */
/*   semi-hosted operation mode.                       */
#ifdef READ_BISTREAM_FROM_FLASH
uint8 *flash_addr = (uint8 *) 0xC200000;
#else
char   *fname = "table_QVGA_150k_30f.m4f";
FILE   *fp;
#endif

uint8  *bitbuf;         /* Input bitstream buffer  */
uint8  *real_bitbuf;
xint    bitstream_size; /* bitstream size in bytes */
uint16 *yuvbuf;         /* output YCbCr images     */
xint    raw_video_size = 152064;  /* maximal raw frame size in bytes (CIF) */

uint32 task_init_time;
uint32 task_init_counter;
uint32 task_dispatch_isr_overhead;
uint32 task_dispatch_isr_counter;

void
timer_isr(void *arg)
{
    printf("Error: Timer1 underflow !\n");
}

int is_pframe; /* Flag used to signal decoding of an I or P frame to DSP */

/* synchronization flags between ARM and DSP */
volatile int arm_do = -1; /* The slice number ARM is currently decoding */
volatile int dsp_do = -1; /* The slice number DSP is currently decoding */
volatile int not_finishing_decoding_frame = 1;
volatile int dsp_display_ok = 0;

int
main(int arc, char *arv[])
{
    DEC_CTRL vdec_obj;
    xint     code, decoded_size;
    uint32   default_count = 0xFFFFFFFF;
	uint8    e_swap[4];

    CSL_TimerHandle timerHandle;
    CSL_TimerObj timerObj;
    CSL_TimerHwSetup timerHwSetup = CSL_TIMER_HWSETUP_DEFAULTS;

    uint16 this_frame_size;

    extern volatile uint16 dsp_ready;
    extern int arm_decode_slice_number;
    extern int dsp_decode_slice_number;
    uint32  tmp_count;
    uint32  display_count = 0;
    //extern int arm_to_dsp_mailbox_count;
    //arm_to_dsp_mailbox_count=0;

    enable_peripheral_clk();
    enable_global_interrupt();
    dspmmu_setup();

    /*Timer Initialization */
    CSL_timerInit(NULL);
    timerHandle = CSL_timerOpen(&timerObj, CSL_TIMER_1, NULL, NULL);

    /* configure timer */
    timerHwSetup.loadVal = 0xFFFFFFFF;
    timerHwSetup.loadMode = CSL_TIMER_LOADMODE_ONESHOT;
    timerHwSetup.emuMode = CSL_TIMER_EMUMODE_STOP;
    timerHwSetup.extClock = CSL_TIMER_EXTCLOCK_ENABLE;
    CSL_timerHwSetup(timerHandle, &timerHwSetup);

	/* determine the size of the video bitstream */
#ifdef READ_BISTREAM_FROM_FLASH
	memcpy(e_swap, flash_addr, 4);
#else
    if ((fp = fopen(fname, "rb")) == NULL)
    {
        printf("Cannot open '%s'.\n", fname);
        return 1;
    }
    if (fread(e_swap, 1, 4, fp) != 4)
    {
        printf("Cannot read '%s'.\n", fname);
        return 1;
    }
#endif
    bitstream_size = (e_swap[1]<<16) + (e_swap[2]<<8) + e_swap[3];

    /* allocating the data buffers for input bitstream and output frame */
    if ((bitbuf = (uint8 *) malloc(bitstream_size)) == NULL)
    {
        printf("Out of memory when allocating 'bitbuf'.\n");
        return 1;
    }

    if ((yuvbuf = (uint16 *) malloc(raw_video_size)) == NULL)
    {
        printf("Out of memory when allocating 'yuvbuf'.\n");
        return 1;
    }

    /* read video bitstream */
#ifdef READ_BISTREAM_FROM_FLASH
    printf("Reading input bitstream from flash ...\n");
    memcpy(bitbuf, flash_addr + 4, bitstream_size);
#else
    printf("Reading input bitstream from semihosted harddrive ...\n");
    fread((void *) bitbuf, 1, bitstream_size, fp);
    fclose(fp);
#endif

    real_bitbuf = (uint8 *) (Bistream_address);
    if (bitstream_size < 4)
    {
        printf("Bitstream data error.\n");
        return 1;
    }

#ifdef LCD_OUT
    lcd();
#endif

    printf("Waiting for DSP ...\n");
    dsp_ready = 0;
    listen_dsp();

	/* Frame bitstream size field is recorded in big-endian format, */
	/* and it is 16-bit word-aligned. The size is in bytes,         */
	/* excluding the two-byte size field.                           */
    this_frame_size = (((bitbuf[0]) << 8) | bitbuf[1]);

    not_finishing_decoding_frame = 1;
    dsp_display_ok = 0;
    arm_do = -1;
    dsp_do = -1;
	is_pframe = 0x0000; /* set bit #8 of the DSP mailbox data to 0 */
    memcpy(real_bitbuf, bitbuf, this_frame_size + 2 + 8); /* + 8 is for trailing header */

    while (!dsp_ready);

    printf("Start decoding ...\n");

    task_init_time = 0;
    task_init_counter = 0;
    task_dispatch_isr_overhead = 0;
    task_dispatch_isr_counter = 0;

    MPU_LOAD_TIMER1 = default_count;
    MPU_READ_TIMER1 = default_count;
    MPU_CNTL_TIMER1 |= 1;

    arm_announce(A2D_INITIALIZE_DEOCDER, this_frame_size + 2);

    /* decode video header to retrieve video frame width and height */
    if ((code = m4v_init_decoder(&vdec_obj, real_bitbuf, 64)) == 0)
    {
        vdec_obj.image = yuvbuf;
        vdec_obj.stride = vdec_obj.width;
        vdec_obj.bitstream = real_bitbuf;

		decoded_size = 0;
        while (decoded_size < bitstream_size)
        {
            vdec_obj.length = this_frame_size + 2 + 4;

			/* send signals to both ARM and DSP so that  */
			/* parallel decoding of current frame begins */
            m4v_decode_frame(&vdec_obj);

            /* When we reach this point, it means ARM has finished */
            /* decoding its last slice of current frame, but DSP   */
            /* may still be decoding its last slice.               */

            /* ARM can begin preparing for decoding of next frame  */
            bitbuf += (this_frame_size + 2);
            decoded_size += (this_frame_size + 2);

            /* size field is 16-bit word-aligned, we must skip */
            /* the padding byte if there is one.               */
            if (((int) bitbuf) % 2)
            {
            	decoded_size++;
            	bitbuf++;
            }

            this_frame_size = ((bitbuf[0]) << 8) | bitbuf[1];
            memcpy(real_bitbuf, bitbuf, this_frame_size + 2);

            /* Adding a trailing header */
            memset(real_bitbuf + this_frame_size + 2, 0, 4);
            real_bitbuf[this_frame_size + 2 + 3] = 1;

            /* busy waiting for DSP to finish decoding of current frame. */
            while (not_finishing_decoding_frame) ;

            is_pframe = 0x0100; /* set bit #8 of the DSP mailbox data to 1 */
            arm_do = -1;
            dsp_do = -1;
            not_finishing_decoding_frame = 1;

#ifdef LCD_OUT
            dsp_display_ok = 0;
            tmp_count = MPU_READ_TIMER1;
            arm_announce(A2D_DISPLAY, 0);
            while (dsp_display_ok == 0);
            display_count += (tmp_count - MPU_READ_TIMER1);
#endif

#ifndef ARM_ONLY
            if (decoded_size < bitstream_size)
            {
                arm_announce(A2D_INITIALIZE_TASK, this_frame_size + 2);
            }
#endif
        }

        m4v_free_decoder(&vdec_obj);
        arm_announce(A2D_RELEASE_DECODER, 0);
    }
    else
    {
        printf("Cannot decode bitstream, code = %d.\n", code);
    }

    tmp_count = MPU_READ_TIMER1;
    MPU_CNTL_TIMER1 &= ~1;
    tmp_count = (default_count - tmp_count + 1) * 2;
    display_count = (display_count + 1) * 2;
    printf("\nFinished decoding ...\n");
    printf("Total Clock Cycles:%lu\n", tmp_count);
    printf("Color conversion and display clock :%lu\n", display_count);
    printf("Decoding FPS(With Color Conversion and Display):%.2f\n",
           (float) 300 / (float) (((float) tmp_count / (float) 96000000)));
    printf("Decoding FPS(Without Color Conversion and Display):%.2f\n",
           (float) 300 / ((float) (tmp_count - display_count) / (float) 96000000));
    //printf("arm_to_dsp_mailbox_count=%d\n", arm_to_dsp_mailbox_count);
    printf("Task Partition Number: ");
    printf("ARM: %d", arm_decode_slice_number);
    printf(", DSP: %d\n", dsp_decode_slice_number);
    printf("ISR overhead: %ld cycles for %ld times\n",
    		(task_dispatch_isr_overhead + 1) * 2, task_dispatch_isr_counter);
    printf("Task initialization overhead: %ld cycles for %ld frames\n",
    		(task_init_time + 1) * 2, task_init_counter);

    return 0;
}
